#This file creates replicates the main findings from Dahlgaard (2016)
#at all monthly cutoff from 14 yrs to 22 yrs on Election Day

setwd("~/dropbox/own projects/turnout instrument/dataan/rd_all/replication_files")
load("mock_datas_nolag.rdata")
load("mock_ns_nolag.rdata")

## commented out for replication files 

#  install.packages("xtable")
#  install.packages("ggplot2")
#  install.packages("rmeta")
#  install.packages("dplyr")
#  install.packages("rdrobust")

library(xtable)
library(ggplot2)
library(dplyr)
library(rmeta)
library(rdrobust)

estimates <- matrix(NA, nrow = 97, ncol = 4)
estimates_robust <- matrix(NA, nrow = 97, ncol = 4)
year <- c(2009, 2013, 2014, 2015)

for (i in 97:1 ) {
  #create large dataset 
  data <- left_join(data_list[[i]], data_n_list[[i]],
                    by = c("days", "year")) 
  
  data_close <- 
    data %>%
    mutate(voted     = round(par_vote*n),
           abstained = round((1-par_vote)*n),
           treated   = days > 0) %>% 
    select(year, days, treated, voted, abstained)
  
  data_voted <- 
    as.data.frame(lapply(data_close, 
                         function(x,p) rep(x,p), 
                         data_close[["voted"]])) %>%
    mutate(voted = 1)
  
  data_abstained <- 
    as.data.frame(lapply(data_close, 
                         function(x,p) rep(x,p), 
                         data_close[["abstained"]])) %>%
    mutate(voted = 0)
  
  data <- 
    rbind(data_voted, data_abstained) # %>% sample_frac(0.1) # for fitting
  
  rd_2009 <- with(data %>% filter(year == 2009),
                  rdrobust(y = voted, x = days))
  sub_2009  <- c(rd_2009$coef[1], rd_2009$se[1])
  sub_2009_robust <- c(rd_2009$coef[3], rd_2009$se[3])
  
  rd_2013 <- with(data %>% filter(year == 2013),
                  rdrobust(y = voted, x = days))
  sub_2013 <- c(rd_2013$coef[1], rd_2013$se[1])
  sub_2013_robust <- c(rd_2013$coef[3], rd_2013$se[3])
  
  rd_2014 <- with(data %>% filter(year == 2014),
                  rdrobust(y = voted, x = days))
  sub_2014 <- c(rd_2014$coef[1], rd_2014$se[1])
  sub_2014_robust <- c(rd_2014$coef[3], rd_2014$se[3])
  
  rd_2015 <- with(data %>% filter(year == 2015),
                  rdrobust(y = voted, x = days))
  sub_2015 <- c(rd_2015$coef[1], rd_2015$se[1])
  sub_2015_robust <- c(rd_2015$coef[3], rd_2015$se[3])
  
  m_eff <- c(sub_2009[1],
             sub_2013[1],
             sub_2014[1],
             sub_2015[1])*100
  
  m_se  <- c(sub_2009[2],
             sub_2013[2],
             sub_2014[2],
             sub_2015[2])*100
  
  m_eff_robust <- c(sub_2009_robust[1],
                    sub_2013_robust[1],
                    sub_2014_robust[1],
                    sub_2015_robust[1])*100
  
  m_se_robust  <- c(sub_2009_robust[2],
                    sub_2013_robust[2],
                    sub_2014_robust[2],
                    sub_2015_robust[2])*100
  meta <- 
    meta.summaries(m_eff, m_se)
  estimates[(98-i),(1:4)] <- c(meta$summary,
                               meta$se.summary,
                               meta$summary/meta$se.summary,
                               1 - pnorm(abs(meta$summary/meta$se.summary)))
  
  meta_robust <- 
    meta.summaries(m_eff_robust, m_se_robust)
  estimates_robust[(98-i),(1:4)] <- c(meta_robust$summary,
                                      meta_robust$se.summary,
                                      meta_robust$summary/meta_robust$se.summary,
                                      1 - pnorm(abs(meta_robust$summary/meta_robust$se.summary)))
  print(i)
  print(Sys.time())
}

est_out <- 
  data.frame(rbind(estimates, estimates_robust)) 

colnames(est_out) <- c("est", "se", "t", "p")

est_out <-
  est_out %>% 
  mutate(t = abs(t),
         time = rep(-48:48, 2),
         type = rep(c("Conventional RD", "Robust RD"), each = 97) )


plac_plot <- 
  ggplot(data = est_out[c(49, 146),], 
         aes(time, 
             fit = est, 
             ymin = est + qnorm(0.975)*se, 
             ymax = est + qnorm(0.025)*se)) +
  geom_linerange(col = "grey60", cex = 2) +
  facet_grid(. ~ type) + 
  geom_point(data = est_out[-c(49, 146),], aes(x = time, y = est)) +                 
  theme_classic() +
  ylab("Estimate (in percentage points)") + 
  xlab("Age cutoff") + 
  scale_x_continuous(breaks = seq(-48, 48, 24),
                     labels = paste(seq(14, 22, 2), "years")) +
  theme(axis.text = element_text(size = 16),
        axis.title = element_text(size = 20),
        panel.spacing = unit(2, "lines")) +
  geom_hline(yintercept = 0, linetype = 3, alpha = 0.8) + 
  geom_hline(data = est_out[c(49, 146),], aes(yintercept =  est), lty = "dashed", col = "black") +
  geom_hline(data = est_out[c(49, 146),], aes(yintercept = -est), lty = "dashed", col = "black") +
  geom_hline(yintercept = (-3:3)[-4], alpha = 0.2, linetype = "dotted") +
  theme()

est_hist <-
  ggplot(est_out[-c(49, 146),], aes(est)) + 
  geom_histogram() +
  theme_classic()  +
  xlab("Estimate") +
  theme(axis.text = element_text(size = 16),
        axis.title = element_text(size = 20),
        panel.spacing = unit(2, "lines")) +
  facet_grid(. ~ type) +
  geom_vline(data = est_out[c(49, 146),], aes(xintercept =  est), lty = 2) +
  geom_vline(data = est_out[c(49, 146),], aes(xintercept = -est), lty = 2) 
  

est_hist

t_hist <-
  ggplot(est_out[-49,], aes(t)) + 
  geom_histogram() +
  theme_classic()  +
  xlab("Numeric t-value") +
  facet_grid(. ~ type) +
  theme(axis.text = element_text(size = 16),
        axis.title = element_text(size = 20),
        panel.spacing = unit(2, "lines")) +
  geom_vline(data = est_out[c(49, 146),], aes(xintercept =  t), lty = 2)

t_hist

